{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# Upload Datasets to HuggingFace\n",
        "\n",
        "This notebook uploads the datasets (banking, healthcare, insurance, telecom, investment) from the ../data folder to HuggingFace as `galileo-ai/agent-leaderboard-v2`.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 13,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "execution_count": 13,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "import os\n",
        "import json\n",
        "import datasets\n",
        "import pandas as pd\n",
        "from glob import glob\n",
        "from dotenv import load_dotenv\n",
        "from pprint import pprint\n",
        "from tqdm.auto import tqdm as notebook_tqdm\n",
        "\n",
        "load_dotenv(\"../.env\")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 14,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Found domains:\n",
            "  - banking\n",
            "    Files: ['tools.json', 'adaptive_tool_use.json', 'personas.json']\n",
            "\n",
            "  - healthcare\n",
            "    Files: ['tools.json', 'adaptive_tool_use.json', 'personas.json']\n",
            "\n",
            "  - insurance\n",
            "    Files: ['tools.json', 'adaptive_tool_use.json', 'personas.json']\n",
            "\n",
            "  - investment\n",
            "    Files: ['tools.json', 'adaptive_tool_use.json', 'personas.json']\n",
            "\n",
            "  - telecom\n",
            "    Files: ['tools.json', 'adaptive_tool_use.json', 'personas.json']\n",
            "\n"
          ]
        }
      ],
      "source": [
        "# Find all domain directories\n",
        "domain_dirs = glob('../data/*/')\n",
        "domain_dirs = [d for d in domain_dirs if not d.endswith('datasets/') and not d.endswith('results/')]\n",
        "domain_dirs = sorted(domain_dirs)\n",
        "\n",
        "print(\"Found domains:\")\n",
        "for domain_dir in domain_dirs:\n",
        "    domain_name = domain_dir.split('/')[-2]\n",
        "    print(f\"  - {domain_name}\")\n",
        "    \n",
        "    # Check what files are available in each domain\n",
        "    json_files = glob(f\"{domain_dir}*.json\")\n",
        "    if json_files:\n",
        "        print(f\"    Files: {[os.path.basename(f) for f in json_files]}\")\n",
        "    else:\n",
        "        print(\"    No JSON files found\")\n",
        "    print()\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 15,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Starting file-type-based upload...\n",
            "Repository: galileo-ai/agent-leaderboard-v2\n",
            "==================================================\n",
            "Note: Using file-type as config and domain as split to handle different schemas\n",
            "\n",
            "Scanning domain: banking\n",
            "  ✓ Found: adaptive_tool_use.json\n",
            "  ✓ Found: personas.json\n",
            "  ✓ Found: tools.json\n",
            "Scanning domain: healthcare\n",
            "  ✓ Found: adaptive_tool_use.json\n",
            "  ✓ Found: personas.json\n",
            "  ✓ Found: tools.json\n",
            "Scanning domain: insurance\n",
            "  ✓ Found: adaptive_tool_use.json\n",
            "  ✓ Found: personas.json\n",
            "  ✓ Found: tools.json\n",
            "Scanning domain: investment\n",
            "  ✓ Found: adaptive_tool_use.json\n",
            "  ✓ Found: personas.json\n",
            "  ✓ Found: tools.json\n",
            "Scanning domain: telecom\n",
            "  ✓ Found: adaptive_tool_use.json\n",
            "  ✓ Found: personas.json\n",
            "  ✓ Found: tools.json\n",
            "\n",
            "==================================================\n",
            "Starting uploads by file type...\n"
          ]
        }
      ],
      "source": [
        "# Expected files for each domain\n",
        "EXPECTED_FILES = ['adaptive_tool_use.json', 'personas.json', 'tools.json']\n",
        "\n",
        "# Repository configuration\n",
        "REPO_NAME = \"galileo-ai/agent-leaderboard-v2\"\n",
        "HF_TOKEN = os.getenv(\"HF_TOKEN\")\n",
        "\n",
        "print(\"Starting file-type-based upload...\")\n",
        "print(f\"Repository: {REPO_NAME}\")\n",
        "print(\"=\" * 50)\n",
        "print(\"Note: Using file-type as config and domain as split to handle different schemas\")\n",
        "print()\n",
        "\n",
        "# Collect data by file type across all domains\n",
        "file_type_data = {\n",
        "    'adaptive_tool_use': {},\n",
        "    'personas': {},\n",
        "    'tools': {}\n",
        "}\n",
        "\n",
        "# First, collect all data organized by file type\n",
        "for domain_dir in domain_dirs:\n",
        "    domain_name = domain_dir.split('/')[-2]\n",
        "    print(f\"Scanning domain: {domain_name}\")\n",
        "    \n",
        "    for filename in EXPECTED_FILES:\n",
        "        file_path = os.path.join(domain_dir, filename)\n",
        "        file_type = filename.replace('.json', '')\n",
        "        \n",
        "        if os.path.exists(file_path):\n",
        "            print(f\"  ✓ Found: {filename}\")\n",
        "            try:\n",
        "                with open(file_path, 'r', encoding='utf-8') as f:\n",
        "                    json_data = json.load(f)\n",
        "                file_type_data[file_type][domain_name] = json_data\n",
        "            except Exception as e:\n",
        "                print(f\"  ✗ Error reading {filename}: {e}\")\n",
        "        else:\n",
        "            print(f\"  ✗ Missing: {filename}\")\n",
        "\n",
        "print(\"\\n\" + \"=\" * 50)\n",
        "print(\"Starting uploads by file type...\")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 16,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "\n",
            "Processing file type: adaptive_tool_use\n",
            "------------------------------\n",
            "  Uploading banking domain as split 'banking'...\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 689.63ba/s]\n",
            "Uploading the dataset shards: 100%|██████████| 1/1 [00:02<00:00,  2.76s/ shards]\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "    ✓ Successfully uploaded 100 records\n",
            "  Uploading healthcare domain as split 'healthcare'...\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 389.99ba/s]\n",
            "Uploading the dataset shards: 100%|██████████| 1/1 [00:02<00:00,  2.86s/ shards]\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "    ✓ Successfully uploaded 100 records\n",
            "  Uploading insurance domain as split 'insurance'...\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 551.59ba/s]\n",
            "Uploading the dataset shards: 100%|██████████| 1/1 [00:02<00:00,  2.09s/ shards]\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "    ✓ Successfully uploaded 100 records\n",
            "  Uploading investment domain as split 'investment'...\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 625.36ba/s]\n",
            "Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00,  1.50s/ shards]\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "    ✓ Successfully uploaded 100 records\n",
            "  Uploading telecom domain as split 'telecom'...\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 697.31ba/s]\n",
            "Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00,  1.63s/ shards]\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "    ✓ Successfully uploaded 100 records\n",
            "\n",
            "Processing file type: personas\n",
            "------------------------------\n",
            "  Uploading banking domain as split 'banking'...\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 776.29ba/s]\n",
            "Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00,  1.39s/ shards]\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "    ✓ Successfully uploaded 100 records\n",
            "  Uploading healthcare domain as split 'healthcare'...\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 933.10ba/s]\n",
            "Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00,  1.94s/ shards]\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "    ✓ Successfully uploaded 100 records\n",
            "  Uploading insurance domain as split 'insurance'...\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1312.77ba/s]\n",
            "Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00,  1.30s/ shards]\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "    ✓ Successfully uploaded 100 records\n",
            "  Uploading investment domain as split 'investment'...\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1073.81ba/s]\n",
            "Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00,  1.33s/ shards]\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "    ✓ Successfully uploaded 100 records\n",
            "  Uploading telecom domain as split 'telecom'...\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1043.88ba/s]\n",
            "Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00,  1.38s/ shards]\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "    ✓ Successfully uploaded 100 records\n",
            "\n",
            "Processing file type: tools\n",
            "------------------------------\n",
            "  Uploading banking domain as split 'banking'...\n",
            "    Converting nested data to JSON strings for tools...\n",
            "      ✓ Converted properties to JSON strings\n",
            "      ✓ Converted response_schema to JSON strings\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 969.11ba/s]\n",
            "Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00,  1.44s/ shards]\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "    ✓ Successfully uploaded 20 records\n",
            "  Uploading healthcare domain as split 'healthcare'...\n",
            "    Converting nested data to JSON strings for tools...\n",
            "      ✓ Converted properties to JSON strings\n",
            "      ✓ Converted response_schema to JSON strings\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 936.23ba/s]\n",
            "Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00,  1.34s/ shards]\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "    ✓ Successfully uploaded 20 records\n",
            "  Uploading insurance domain as split 'insurance'...\n",
            "    Converting nested data to JSON strings for tools...\n",
            "      ✓ Converted properties to JSON strings\n",
            "      ✓ Converted response_schema to JSON strings\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 405.99ba/s]\n",
            "Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00,  1.42s/ shards]\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "    ✓ Successfully uploaded 20 records\n",
            "  Uploading investment domain as split 'investment'...\n",
            "    Converting nested data to JSON strings for tools...\n",
            "      ✓ Converted properties to JSON strings\n",
            "      ✓ Converted response_schema to JSON strings\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 825.16ba/s]\n",
            "Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00,  1.46s/ shards]\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "    ✓ Successfully uploaded 20 records\n",
            "  Uploading telecom domain as split 'telecom'...\n",
            "    Converting nested data to JSON strings for tools...\n",
            "      ✓ Converted properties to JSON strings\n",
            "      ✓ Converted response_schema to JSON strings\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 1103.76ba/s]\n",
            "Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00,  1.39s/ shards]\n"
          ]
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "    ✓ Successfully uploaded 20 records\n",
            "\n",
            "==================================================\n",
            "Upload completed with JSON string conversion for tools!\n"
          ]
        }
      ],
      "source": [
        "# Improved upload with JSON string conversion for tools\n",
        "for file_type, domain_data in file_type_data.items():\n",
        "    if not domain_data:\n",
        "        print(f\"\\nSkipping {file_type} - no data found\")\n",
        "        continue\n",
        "        \n",
        "    print(f\"\\nProcessing file type: {file_type}\")\n",
        "    print(\"-\" * 30)\n",
        "    \n",
        "    # Upload each domain as a split for this file type\n",
        "    for domain_name, json_data in domain_data.items():\n",
        "        print(f\"  Uploading {domain_name} domain as split '{domain_name}'...\")\n",
        "        \n",
        "        try:\n",
        "            # Convert to pandas DataFrame\n",
        "            if isinstance(json_data, list):\n",
        "                df = pd.DataFrame(json_data)\n",
        "            else:\n",
        "                # If it's a single object, wrap it in a list\n",
        "                df = pd.DataFrame([json_data])\n",
        "            \n",
        "            # Special handling for tools: convert nested dictionaries to JSON strings\n",
        "            if file_type == 'tools':\n",
        "                print(f\"    Converting nested data to JSON strings for tools...\")\n",
        "                # Convert 'properties' and 'response_schema' to JSON strings\n",
        "                for col in ['properties', 'response_schema']:\n",
        "                    if col in df.columns:\n",
        "                        df[col] = df[col].apply(lambda x: json.dumps(x) if isinstance(x, dict) else x)\n",
        "                        print(f\"      ✓ Converted {col} to JSON strings\")\n",
        "            \n",
        "            # Clean up any unwanted columns\n",
        "            if \"index\" in df.columns:\n",
        "                del df[\"index\"]\n",
        "            if \"id\" in df.columns and len(df.columns) > 2:  # Keep id if it's meaningful\n",
        "                del df[\"id\"]\n",
        "            \n",
        "            # Create dataset\n",
        "            dataset = datasets.Dataset.from_pandas(df)\n",
        "            \n",
        "            # Upload to HuggingFace\n",
        "            dataset.push_to_hub(\n",
        "                REPO_NAME,\n",
        "                config_name=file_type,    # File type becomes the config\n",
        "                split=domain_name,        # Domain becomes the split\n",
        "                token=HF_TOKEN\n",
        "            )\n",
        "            \n",
        "            print(f\"    ✓ Successfully uploaded {len(df)} records\")\n",
        "            \n",
        "        except Exception as e:\n",
        "            print(f\"    ✗ Error uploading {domain_name}: {str(e)}\")\n",
        "\n",
        "print(\"\\n\" + \"=\" * 50)\n",
        "print(\"Upload completed with JSON string conversion for tools!\")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 17,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Testing download functionality...\n",
            "========================================\n",
            "To test downloads, run these commands after the upload completes:\n",
            "\n",
            "# Example downloads:\n",
            "banking_tools = load_dataset(\"galileo-ai/agent-leaderboard-v2\", \"tools\", split=\"banking\")\n",
            "investment_personas = load_dataset(\"galileo-ai/agent-leaderboard-v2\", \"personas\", split=\"investment\")\n",
            "healthcare_scenarios = load_dataset(\"galileo-ai/agent-leaderboard-v2\", \"adaptive_tool_use\", split=\"healthcare\")\n",
            "\n",
            "# Load all domains for a specific file type:\n",
            "all_tools = load_dataset(\"galileo-ai/agent-leaderboard-v2\", \"tools\")\n",
            "# This gives access to: all_tools['banking'], all_tools['investment'], etc.\n",
            "\n",
            "✅ This structure solves the schema mismatch issue!\n",
            "✅ Each file type maintains its own schema within its config\n",
            "✅ Domain information is preserved in both split names and the 'domain' column\n"
          ]
        }
      ],
      "source": [
        "# Test downloading the uploaded data to verify it works\n",
        "print(\"Testing download functionality...\")\n",
        "print(\"=\" * 40)\n",
        "\n",
        "try:\n",
        "    from datasets import load_dataset\n",
        "    \n",
        "    print(\"To test downloads, run these commands after the upload completes:\")\n",
        "    print()\n",
        "    print(\"# Example downloads:\")\n",
        "    print(f'banking_tools = load_dataset(\"{REPO_NAME}\", \"tools\", split=\"banking\")')\n",
        "    print(f'investment_personas = load_dataset(\"{REPO_NAME}\", \"personas\", split=\"investment\")')\n",
        "    print(f'healthcare_scenarios = load_dataset(\"{REPO_NAME}\", \"adaptive_tool_use\", split=\"healthcare\")')\n",
        "    print()\n",
        "    print(\"# Load all domains for a specific file type:\")\n",
        "    print(f'all_tools = load_dataset(\"{REPO_NAME}\", \"tools\")')\n",
        "    print(\"# This gives access to: all_tools['banking'], all_tools['investment'], etc.\")\n",
        "    print()\n",
        "    print(\"✅ This structure solves the schema mismatch issue!\")\n",
        "    print(\"✅ Each file type maintains its own schema within its config\")\n",
        "    print(\"✅ Domain information is preserved in both split names and the 'domain' column\")\n",
        "    \n",
        "except Exception as e:\n",
        "    print(f\"Note: Download test setup complete: {e}\")\n",
        "    print(\"Run the download commands manually after upload completes.\")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 18,
      "metadata": {},
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Generating banking split: 100%|██████████| 20/20 [00:00<00:00, 3822.04 examples/s]\n",
            "Generating healthcare split: 100%|██████████| 20/20 [00:00<00:00, 5304.88 examples/s]\n",
            "Generating insurance split: 100%|██████████| 20/20 [00:00<00:00, 6377.23 examples/s]\n",
            "Generating investment split: 100%|██████████| 20/20 [00:00<00:00, 8113.56 examples/s]\n",
            "Generating telecom split: 100%|██████████| 20/20 [00:00<00:00, 6580.85 examples/s]\n",
            "Generating banking split: 100%|██████████| 100/100 [00:00<00:00, 20455.03 examples/s]\n",
            "Generating healthcare split: 100%|██████████| 100/100 [00:00<00:00, 23718.07 examples/s]\n",
            "Generating insurance split: 100%|██████████| 100/100 [00:00<00:00, 32403.46 examples/s]\n",
            "Generating investment split: 100%|██████████| 100/100 [00:00<00:00, 40209.99 examples/s]\n",
            "Generating telecom split: 100%|██████████| 100/100 [00:00<00:00, 51787.92 examples/s]\n",
            "Generating banking split: 100%|██████████| 100/100 [00:00<00:00, 19688.80 examples/s]\n",
            "Generating healthcare split: 100%|██████████| 100/100 [00:00<00:00, 29645.91 examples/s]\n",
            "Generating insurance split: 100%|██████████| 100/100 [00:00<00:00, 31847.41 examples/s]\n",
            "Generating investment split: 100%|██████████| 100/100 [00:00<00:00, 40852.28 examples/s]\n",
            "Generating telecom split: 100%|██████████| 100/100 [00:00<00:00, 54613.33 examples/s]\n"
          ]
        }
      ],
      "source": [
        "banking_tools = load_dataset(\"galileo-ai/agent-leaderboard-v2\", \"tools\", split=\"banking\")\n",
        "investment_personas = load_dataset(\"galileo-ai/agent-leaderboard-v2\", \"personas\", split=\"investment\")\n",
        "healthcare_scenarios = load_dataset(\"galileo-ai/agent-leaderboard-v2\", \"adaptive_tool_use\", split=\"healthcare\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 19,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "{'description': 'Retrieves comprehensive account balance information including '\n",
            "                'current balance, available balance, pending transactions, and '\n",
            "                \"recent activity summary for a customer's bank account.\",\n",
            " 'properties': {'account_number': {'description': 'The bank account number for '\n",
            "                                                  'which balance information '\n",
            "                                                  'is requested.',\n",
            "                                   'title': 'Account_Number',\n",
            "                                   'type': 'string'},\n",
            "                'account_type': {'description': 'The type of bank account to '\n",
            "                                                'check balance for.',\n",
            "                                 'enum': ['checking',\n",
            "                                          'savings',\n",
            "                                          'credit',\n",
            "                                          'money_market',\n",
            "                                          'cd'],\n",
            "                                 'title': 'Account_Type',\n",
            "                                 'type': 'string'},\n",
            "                'include_pending': {'description': 'Whether to include pending '\n",
            "                                                   'transactions in the '\n",
            "                                                   'balance calculation.',\n",
            "                                    'title': 'Include_Pending',\n",
            "                                    'type': 'boolean'},\n",
            "                'transaction_days': {'description': 'Number of days of recent '\n",
            "                                                    'transaction history to '\n",
            "                                                    'include in the summary.',\n",
            "                                     'title': 'Transaction_Days',\n",
            "                                     'type': 'integer'}},\n",
            " 'required': ['account_number', 'account_type'],\n",
            " 'response_schema': {'description': 'Account balance information with current '\n",
            "                                    'balance, available funds, and transaction '\n",
            "                                    'summary.',\n",
            "                     'properties': {'available_balance': {'description': 'The '\n",
            "                                                                         'available '\n",
            "                                                                         'balance '\n",
            "                                                                         'amount '\n",
            "                                                                         'that '\n",
            "                                                                         'can '\n",
            "                                                                         'be '\n",
            "                                                                         'withdrawn '\n",
            "                                                                         'or '\n",
            "                                                                         'spent.',\n",
            "                                                          'type': 'number'},\n",
            "                                    'current_balance': {'description': 'The '\n",
            "                                                                       'current '\n",
            "                                                                       'account '\n",
            "                                                                       'balance '\n",
            "                                                                       'amount.',\n",
            "                                                        'type': 'number'},\n",
            "                                    'pending_transactions_count': {'description': 'The '\n",
            "                                                                                  'number '\n",
            "                                                                                  'of '\n",
            "                                                                                  'pending '\n",
            "                                                                                  'transactions '\n",
            "                                                                                  'affecting '\n",
            "                                                                                  'the '\n",
            "                                                                                  'account.',\n",
            "                                                                   'type': 'integer'}},\n",
            "                     'required': ['current_balance',\n",
            "                                  'available_balance',\n",
            "                                  'pending_transactions_count'],\n",
            "                     'type': 'object'},\n",
            " 'title': 'get_account_balance',\n",
            " 'type': 'object'}\n"
          ]
        }
      ],
      "source": [
        "def convert_tool_json_strings(tool_record):\n",
        "    tool = dict(tool_record)\n",
        "\n",
        "    # Convert 'properties' from JSON string to dict\n",
        "    if 'properties' in tool and isinstance(tool['properties'], str):\n",
        "        tool['properties'] = json.loads(tool['properties'])\n",
        "\n",
        "    # Convert 'response_schema' from JSON string to dict  \n",
        "    if 'response_schema' in tool and isinstance(tool['response_schema'], str):\n",
        "        tool['response_schema'] = json.loads(tool['response_schema'])\n",
        "\n",
        "    return tool\n",
        "\n",
        "pprint(convert_tool_json_strings(banking_tools[0]))"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 20,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "{'name': 'Richard Chen',\n",
              " 'age': 58,\n",
              " 'occupation': 'Semi-retired Financial Advisor and Part-time Consultant',\n",
              " 'personality_traits': ['methodical', 'skeptical', 'detail-oriented'],\n",
              " 'tone': 'formal',\n",
              " 'detail_level': 'comprehensive'}"
            ]
          },
          "execution_count": 20,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "investment_personas[0]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 21,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "{'persona_index': 2,\n",
              " 'first_message': \"I need some help managing my healthcare situation. I was just diagnosed with Type 2 diabetes last week, and I need to find an endocrinologist who accepts Blue Cross insurance and can see me before my conference trip to Boston on May 14th-18th. I also need to get my recent A1C test results from April 28th to bring to the appointment, and set up medication reminders for my new Metformin prescription (500mg twice daily with meals). Could you also find clinical trials for diabetic neuropathy within 25 miles of my home in Cambridge? I'd like to leave feedback about Dr. Patel's dismissive attitude during my diagnosis appointment last Thursday at 2:30pm too. Oh, and my daughter mentioned there's a diabetes management program at the hospital - can you check if it's covered by my insurance plan?\",\n",
              " 'user_goals': ['Find and schedule an appointment with an endocrinologist who accepts Blue Cross insurance before May 14th',\n",
              "  'Retrieve A1C test results from April 28th',\n",
              "  'Set up medication reminders for Metformin 500mg twice daily with meals',\n",
              "  'Search for clinical trials for diabetic neuropathy within 25 miles of Cambridge',\n",
              "  \"Submit feedback about Dr. Patel's appointment on Thursday at 2:30pm\",\n",
              "  \"Check if the hospital's diabetes management program is covered by the patient's insurance plan\"]}"
            ]
          },
          "execution_count": 21,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "healthcare_scenarios[0]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": []
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python (al)",
      "language": "python",
      "name": "al"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.12.10"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 2
}
